X-ray dataset used: https://www.kaggle.com/datasets/ahemateja19bec1025/covid-xray-dataset
# Make necessary imports
from PIL import Image, ImageEnhance, ImageDraw
import numpy as np
from random import shuffle
from IPython.core.display import HTML
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import base64
from io import BytesIO
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from seaborn import heatmap, countplot
red = (255, 0, 0)
black = (0, 0, 0)
grey = (128, 128, 128)
white = (255, 255, 255)
segmented_colors = [black, grey, white]
indices = {black: 0, grey: 1, white: 2}
contrast_factor = 3
def img_to_base64(img):
buffer = BytesIO()
img.save(buffer, format="JPEG")
return "data:image/jpeg;base64,"+base64.b64encode(buffer.getvalue()).decode()
# Function to display colors
def display_colors(colors):
s=''
for color in colors:
s+=f'<div style="background-color:rgb({int(color[0])},{int(color[1])},{int(color[2])}); display:inline-block; margin:2px; border:2px solid #888; height:50px; width:50px;"></div>'
display(HTML(s))
def display_images(images):
s=''
for image in images:
s+=f'<img src="{img_to_base64(image)}" style="width:200px; object-fit:contain; display:inline-block; margin:1px;">'
display(HTML(s))
display_colors([red, black, grey, white])
def counter(img, d):
arr = np.asarray(img)
arr = arr.reshape(-1, arr.shape[-1])
unique, counts = np.unique(arr, return_counts=True, axis=0)
ct = {}
for p, c in zip(unique, counts):
p = tuple(p)
if p in d: ct[d[p]] = c
return ct
def get_ratios(b):
bc = np.asarray([b.get(i, 0) for i in range(3)])
return bc/bc.sum()
def posterize(image, colors):
img = np.asarray(image).copy()
s = img.shape
img1 = img.reshape(-1, s[-1])
eds = []
for p in colors:
t = np.sum((img1-p)**2, axis=1)
eds.append(t)
ed = np.asarray(tuple(eds)).T
im = ed.argmin(axis=1)
for i, p in enumerate(colors): img1[im==i] = p
return Image.fromarray(img1.reshape(s))
def trim(img, target_color, replace_color):
arr = np.asarray(img)
s = arr.shape
for i in range(s[0]):
if (arr[i][0]==target_color).all():
ImageDraw.floodfill(img, (0, i), replace_color)
arr = np.asarray(img)
if (arr[i][s[1]-1]==target_color).all():
ImageDraw.floodfill(img, (s[1]-1, i), replace_color)
arr = np.asarray(img)
for i in range(s[1]):
if (arr[0][i]==target_color).all():
ImageDraw.floodfill(img, (i, 0), replace_color)
arr = np.asarray(img)
if (arr[s[0]-1][i]==target_color).all():
ImageDraw.floodfill(img, (i, s[0]-1), replace_color)
arr = np.asarray(img)
def get_features(image_path, cf=contrast_factor):
image = Image.open(image_path)
imgs = [image.copy()]
image = ImageEnhance.Contrast(image).enhance(cf)
imgs.append(image.copy())
image = posterize(image, segmented_colors)
imgs.append(image.copy())
trim(image, black, red)
imgs.append(image.copy())
b = counter(image, indices)
rb = get_ratios(b)
print(b, rb)
display_images(imgs)
return rb
print('COVID Negative X-Ray Example')
get_features(f'D:/DATA/0/normal (56).jpeg')
print('\nCOVID Positive X-Ray Example')
get_features(f'D:/DATA/1/covid (56).jpeg')
print()
# Feature extraction from COVID Negative x-ray samples
neg_data = []
for i in tqdm(range(1, 1302)):
image = Image.open(f'D:/DATA/0/normal ({i}).jpeg')
image = ImageEnhance.Contrast(image).enhance(contrast_factor)
image = posterize(image, segmented_colors)
trim(image, black, red)
counts = counter(image, indices)
features = get_ratios(counts)
neg_data.append(features)
# image.save(f'D:/DATA/Processed/0/normal ({i}).jpeg')
# Feature extraction from COVID Positive x-ray samples
pos_data = []
for i in tqdm(range(1, 1791)):
image = Image.open(f'D:/DATA/1/covid ({i}).jpeg')
image = ImageEnhance.Contrast(image).enhance(contrast_factor)
image = posterize(image, segmented_colors)
trim(image, black, red)
counts = counter(image, indices)
features = get_ratios(counts)
pos_data.append(features)
# image.save(f'D:/DATA/Processed/1/covid ({i}).jpeg')
# Saving feature and target data into a CSV file
data = []
s = ''
for v in neg_data:
v = list(v)
data.append(v+[0])
s+= ','.join(map(str, v)) + ',0\n'
for v in pos_data:
data.append(list(v)+[1])
s+= ','.join(map(str, v)) + ',1\n'
with open('D:/DATA/covid-xray-features.csv', 'w') as f:
f.write(s)
shuffle(data)
# Loading data from CSV file
# Run this if Features are already extracted
with open('D:/DATA/covid-xray-features.csv', 'r') as f:
s = f.read()
neg_data = []
pos_data = []
data = []
for l in s.splitlines():
datum = list(map(float, l.split(',')))
data.append(datum)
if datum[-1]==0: neg_data.append(datum)
else: pos_data.append(datum)
shuffle(data)
d = np.asarray(data)
X, y = d[:,:3], d[:,3]
print(X, y)
upto = 1200
pop_a = mpatches.Patch(color='tab:green', label='Negative')
pop_b = mpatches.Patch(color='tab:red', label='Positive')
for i, c in enumerate(['Black', 'Grey', 'White']):
a = np.asarray(neg_data)[:upto, i]
b = np.asarray(pos_data)[:upto, i]
plt.scatter(range(1, upto+1), a, color = 'tab:green')
plt.scatter(range(1, upto+1), b, color = 'tab:red')
plt.legend(handles=[pop_a, pop_b])
plt.ylabel(f'{c} Pixel Proportion')
plt.show()
plt.plot(range(1, upto+1), sorted(a), color = 'tab:green')
plt.plot(range(1, upto+1), sorted(b), color = 'tab:red')
plt.legend(handles=[pop_a, pop_b])
plt.ylabel(f'{c} Pixel Proportion')
plt.show()
# Split dataset for training & testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=120)
print('Size of Training set:', len(X_train))
print('Size of Testing set:', len(X_test))
class LogisticRegressionModel():
def __init__(self, X, y, learning_rate, iterations):
self.learning_rate = learning_rate
self.m, self.n = X.shape
self.W = np.zeros(self.n)
self.b = 0
self.X = X
self.y = y
for i in range(iterations):
self.update_weights()
def update_weights(self) :
a = 1/(1+np.exp(-(self.X.dot(self.W)+self.b)))
dz = a - self.y.T
dz = np.reshape(dz, self.m)
dW = np.dot(self.X.T, dz)/self.m
db = np.sum(dz)/self.m
self.W = self.W - self.learning_rate * dW
self.b = self.b - self.learning_rate * db
def predict(self, X):
Z = 1/(1 + np.exp(-(X.dot(self.W)+self.b)))
y = np.where(Z > 0.5, 1, 0)
return y
model = LogisticRegressionModel(X_train, y_train, learning_rate = 0.2, iterations = 10000)
print('Weights:', model.W)
y_pred = model.predict(X_test)
print('Accuracy:', accuracy_score(y_test, y_pred))
print('\nClassification Report:\n\n', classification_report(y_test, y_pred))
c_m = confusion_matrix(y_test, y_pred)
target_names = ['POSITIVE', 'NEGATIVE']
h_m = heatmap(data=c_m, annot=True, fmt='g', xticklabels=target_names, yticklabels=target_names)
h_m.set(xlabel='PREDICTED COVID RESULTS', ylabel='ACTUAL DATA')
h_m
plt.show()
clf = LogisticRegression(max_iter=10000).fit(X_train, y_train)
y_pred = clf.predict(X_test)
print('Accuracy:', accuracy_score(y_test, y_pred))
print('\nClassification Report:\n\n', classification_report(y_test, y_pred))
c_m = confusion_matrix(y_test, y_pred)
target_names = ['POSITIVE', 'NEGATIVE']
h_m = heatmap(data=c_m, annot=True, fmt='g', xticklabels=target_names, yticklabels=target_names)
h_m.set(xlabel='PREDICTED COVID RESULTS', ylabel='ACTUAL DATA')
h_m
plt.show()
def predict_from_image(image_path, sklearn_model, custom_model, target_classes):
features = np.asarray([get_features(image_path)])
res = sklearn_model.predict(features)[0]
print('SKLearn model result:', target_classes[int(res)])
res = custom_model.predict(features)[0]
print('Custom model result:', target_classes[int(res)])
print('Test with train & test excluded samples')
print('\nTesting a COVID Negative X-Ray image')
predict_from_image('D:/Downloads/normal.jpg', clf, model, ['NEGATIVE', 'POSITIVE'])
print('\nTesting a COVID Positive X-Ray image')
predict_from_image('D:/Downloads/covid.jpg', clf, model, ['NEGATIVE', 'POSITIVE'])